load libs

library(splines)
library(stringr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
library(DBI)

Load exploration

# please uncoments the following if do not have nhanes.sqlite

# fileURL <- "https://github.com/ccb-hms/Imputation/blob/main/nhanes.sqlite"
# if(!file.exists("nhanes.sqlite")){
#     res <- tryCatch(download.file(fileURL,
#                               destfile="./nhanes.sqlite",
#                               method="auto"),
#                 error=function(e) 1)
# }
set.seed(123)

nhanes_db <- dbConnect(RSQLite::SQLite(), "nhanes.sqlite")

# list all of the tables
dbListTables(nhanes_db)
## [1] "blood_cholesterol"     "body_measures"         "current_health_status"
## [4] "demo"                  "diabetes"              "diet_total"           
## [7] "medical_conditions"    "merged_table"          "var_decr"
cols <- 'BMXWAIST , RIDAGEYR, BMXHT, BMXWT, BMXBMI, RIAGENDR, years, DR1TM161, WTDRD1, BMXLEG, BMXARML '
data_sql <- paste0('SELECT ', cols, 'FROM merged_table')

dbListTables(nhanes_db)
## [1] "blood_cholesterol"     "body_measures"         "current_health_status"
## [4] "demo"                  "diabetes"              "diet_total"           
## [7] "medical_conditions"    "merged_table"          "var_decr"
data <- dbGetQuery(nhanes_db, data_sql)
data <- na.omit(data)

dbDisconnect(nhanes_db)


train_ix <- sample(x = 1:nrow(data), size = 5000)
test_ix <- sample(x = setdiff(1:nrow(data), train_ix), 3000)

train_data <- data[train_ix, ]
test_data <- data[test_ix, ]

Inverse Normal Distribution

invNorm <- function(x) {qnorm((rank(x) - 3/8)/(length(x) +1 - 6/8))}

mean_square_error <- function(y_true, y_pred){
    round(mean((y_true - y_pred)^2),4)
}

plot_density <- function(data,data_name,col='red'){
    d <- density(data)
    plot(d,main=paste(data_name,"Density"))
    polygon(d, col=col, border="blue")
  }

Load exploration

qplot(x=BMXHT,y=BMXWAIST,data=data,colour=RIAGENDR,alpha=I(0.1))

qplot(x=RIDAGEYR,y=BMXWAIST,data=data,colour=RIAGENDR,alpha=I(0.1))

qplot(x=BMXWT,y=BMXWAIST,data=data,colour=RIAGENDR,alpha=I(0.1))

regression models

test_df <- test_data |> select(-BMXWAIST)



run_model <- function(formula_str,train_data_set=train_data,
                      test_data_set=test_df){
  # setup regression model
  lm_reg = lm(formula = as.formula(formula_str), train_data_set)
  print(summary(lm_reg))
  
  # run prediction
  lm_pred = predict(lm_reg, newdata = test_df, se = T)
  
  # save prediction results
  pred_df = data.frame(
    fit = lm_pred$fit,
    weight = test_data$BMXWT,
    sex = test_data$RIAGENDR,
    label = test_data$BMXWAIST
  )
  # compute MSE
  mse<- mean_square_error(pred_df$fit, pred_df$label)
  
  #plot results
  g <-  ggplot(pred_df, aes(x = weight, y = label)) + geom_point(colour = "black",alpha = 0.1) +
    geom_point(aes(x = weight, y = fit, colour = sex,alpha = 0.1),
              size = 1.5) + ylab("waist circumference")
  
  g+ggtitle(paste("MSE = ",mse))
}

regression models weight

run_model("BMXWAIST ~ BMXWT")
## 
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -29.3723  -4.8747  -0.1382   4.7755  23.5811 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 43.865314   0.412785   106.3   <2e-16 ***
## BMXWT        0.682412   0.004917   138.8   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.222 on 4998 degrees of freedom
## Multiple R-squared:  0.794,  Adjusted R-squared:  0.794 
## F-statistic: 1.926e+04 on 1 and 4998 DF,  p-value: < 2.2e-16

run_model("BMXWAIST ~ bs(BMXWT)")
## 
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -29.2327  -4.7743  -0.1098   4.7283  23.0092 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  65.2516     0.6783   96.20   <2e-16 ***
## bs(BMXWT)1   49.4273     2.2759   21.72   <2e-16 ***
## bs(BMXWT)2   87.3240     2.5591   34.12   <2e-16 ***
## bs(BMXWT)3  107.2327     4.4082   24.33   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.16 on 4996 degrees of freedom
## Multiple R-squared:  0.7976, Adjusted R-squared:  0.7975 
## F-statistic:  6563 on 3 and 4996 DF,  p-value: < 2.2e-16

run_model("BMXWAIST ~ bs(RIDAGEYR, df = 7) + BMXWT + RIAGENDR + years")
## 
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -29.0289  -3.9078   0.0723   3.8558  21.5874 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           38.276240   0.630800  60.679  < 2e-16 ***
## bs(RIDAGEYR, df = 7)1 -0.292606   0.929404  -0.315  0.75290    
## bs(RIDAGEYR, df = 7)2  1.475108   0.680592   2.167  0.03025 *  
## bs(RIDAGEYR, df = 7)3  2.018121   0.743094   2.716  0.00663 ** 
## bs(RIDAGEYR, df = 7)4  5.110992   0.644329   7.932 2.64e-15 ***
## bs(RIDAGEYR, df = 7)5  8.424950   0.839573  10.035  < 2e-16 ***
## bs(RIDAGEYR, df = 7)6 11.875666   0.914794  12.982  < 2e-16 ***
## bs(RIDAGEYR, df = 7)7 10.301733   1.023286  10.067  < 2e-16 ***
## BMXWT                  0.719066   0.004319 166.492  < 2e-16 ***
## RIAGENDRMale          -3.995553   0.177064 -22.566  < 2e-16 ***
## years2005-2006        -0.262251   0.366004  -0.717  0.47370    
## years2007-2008        -0.021495   0.348841  -0.062  0.95087    
## years2009-2010        -0.460441   0.342226  -1.345  0.17855    
## years2013-2014         0.124610   0.349292   0.357  0.72129    
## years2015-2016         0.781043   0.353866   2.207  0.02735 *  
## years2017-2018         0.673979   0.357586   1.885  0.05951 .  
## years2022-2012         0.002106   0.363684   0.006  0.99538    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.032 on 4983 degrees of freedom
## Multiple R-squared:  0.8568, Adjusted R-squared:  0.8563 
## F-statistic:  1863 on 16 and 4983 DF,  p-value: < 2.2e-16

run_model("BMXWAIST ~ ns(RIDAGEYR, df = 7) + BMXWT + RIAGENDR + years")
## 
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -29.0730  -3.9146   0.0451   3.8459  21.5567 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           38.13401    0.56302  67.731  < 2e-16 ***
## ns(RIDAGEYR, df = 7)1  2.14999    0.56011   3.839 0.000125 ***
## ns(RIDAGEYR, df = 7)2  2.50719    0.68061   3.684 0.000232 ***
## ns(RIDAGEYR, df = 7)3  4.99062    0.60665   8.227 2.44e-16 ***
## ns(RIDAGEYR, df = 7)4  6.73369    0.61603  10.931  < 2e-16 ***
## ns(RIDAGEYR, df = 7)5 10.17415    0.59901  16.985  < 2e-16 ***
## ns(RIDAGEYR, df = 7)6 11.29792    1.04356  10.826  < 2e-16 ***
## ns(RIDAGEYR, df = 7)7 11.02205    0.63264  17.422  < 2e-16 ***
## BMXWT                  0.71904    0.00432 166.448  < 2e-16 ***
## RIAGENDRMale          -3.99138    0.17704 -22.545  < 2e-16 ***
## years2005-2006        -0.23504    0.36541  -0.643 0.520112    
## years2007-2008         0.02642    0.34687   0.076 0.939282    
## years2009-2010        -0.40802    0.34011  -1.200 0.230321    
## years2013-2014         0.17293    0.34745   0.498 0.618711    
## years2015-2016         0.82748    0.35197   2.351 0.018761 *  
## years2017-2018         0.71602    0.35612   2.011 0.044420 *  
## years2022-2012         0.04735    0.36196   0.131 0.895928    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.032 on 4983 degrees of freedom
## Multiple R-squared:  0.8567, Adjusted R-squared:  0.8563 
## F-statistic:  1862 on 16 and 4983 DF,  p-value: < 2.2e-16

run_model("BMXWAIST ~ bs(RIDAGEYR, df = 7) + bs(BMXWT) + RIAGENDR + BMXHT")
## 
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -29.6852  -3.1909   0.0366   3.2605  21.9850 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           129.48065    1.79228  72.244  < 2e-16 ***
## bs(RIDAGEYR, df = 7)1  -0.52947    0.78626  -0.673   0.5007    
## bs(RIDAGEYR, df = 7)2   0.61028    0.57631   1.059   0.2897    
## bs(RIDAGEYR, df = 7)3   1.26110    0.62853   2.006   0.0449 *  
## bs(RIDAGEYR, df = 7)4   3.82980    0.54493   7.028 2.38e-12 ***
## bs(RIDAGEYR, df = 7)5   6.49053    0.70960   9.147  < 2e-16 ***
## bs(RIDAGEYR, df = 7)6   8.88156    0.77303  11.489  < 2e-16 ***
## bs(RIDAGEYR, df = 7)7   8.80371    0.85725  10.270  < 2e-16 ***
## bs(BMXWT)1             65.41056    1.65931  39.420  < 2e-16 ***
## bs(BMXWT)2             96.00156    1.83515  52.313  < 2e-16 ***
## bs(BMXWT)3            126.77400    3.15875  40.134  < 2e-16 ***
## RIAGENDRMale            0.94313    0.20018   4.711 2.53e-06 ***
## BMXHT                  -0.45352    0.01091 -41.569  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.103 on 4987 degrees of freedom
## Multiple R-squared:  0.8974, Adjusted R-squared:  0.8971 
## F-statistic:  3634 on 12 and 4987 DF,  p-value: < 2.2e-16

run_model("BMXWAIST ~ bs(RIDAGEYR, df = 7) + BMXWT + RIAGENDR + BMXHT + years")
## 
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -29.6655  -3.2828  -0.0158   3.3540  21.9432 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           104.510469   1.821093  57.389  < 2e-16 ***
## bs(RIDAGEYR, df = 7)1  -0.503574   0.817543  -0.616  0.53795    
## bs(RIDAGEYR, df = 7)2   1.083962   0.598751   1.810  0.07030 .  
## bs(RIDAGEYR, df = 7)3   1.543654   0.653760   2.361  0.01825 *  
## bs(RIDAGEYR, df = 7)4   4.341085   0.567124   7.655 2.31e-14 ***
## bs(RIDAGEYR, df = 7)5   6.867314   0.739633   9.285  < 2e-16 ***
## bs(RIDAGEYR, df = 7)6   9.572545   0.806930  11.863  < 2e-16 ***
## bs(RIDAGEYR, df = 7)7   8.366009   0.901531   9.280  < 2e-16 ***
## BMXWT                   0.779610   0.004117 189.382  < 2e-16 ***
## RIAGENDRMale            1.260216   0.207849   6.063 1.43e-09 ***
## BMXHT                  -0.433638   0.011356 -38.186  < 2e-16 ***
## years2005-2006         -0.469043   0.321990  -1.457  0.14526    
## years2007-2008         -0.300321   0.306935  -0.978  0.32790    
## years2009-2010         -0.940394   0.301292  -3.121  0.00181 ** 
## years2013-2014         -0.231794   0.307387  -0.754  0.45084    
## years2015-2016         -0.048571   0.312025  -0.156  0.87630    
## years2017-2018         -0.302210   0.315578  -0.958  0.33829    
## years2022-2012         -0.298817   0.320001  -0.934  0.35045    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.305 on 4982 degrees of freedom
## Multiple R-squared:  0.8892, Adjusted R-squared:  0.8888 
## F-statistic:  2352 on 17 and 4982 DF,  p-value: < 2.2e-16

run_model("BMXWAIST ~ bs(RIDAGEYR, df = 7) + invNorm(BMXWT) + RIAGENDR + BMXHT + years")
## 
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -27.5690  -3.5938  -0.1112   3.3492  28.3049 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           171.17821    2.03946  83.933  < 2e-16 ***
## bs(RIDAGEYR, df = 7)1  -0.61330    0.86133  -0.712   0.4765    
## bs(RIDAGEYR, df = 7)2   0.07857    0.63111   0.124   0.9009    
## bs(RIDAGEYR, df = 7)3   0.91193    0.68897   1.324   0.1857    
## bs(RIDAGEYR, df = 7)4   3.42366    0.59781   5.727 1.08e-08 ***
## bs(RIDAGEYR, df = 7)5   5.97717    0.77956   7.667 2.10e-14 ***
## bs(RIDAGEYR, df = 7)6   8.21610    0.85031   9.662  < 2e-16 ***
## bs(RIDAGEYR, df = 7)7   8.89862    0.94985   9.368  < 2e-16 ***
## invNorm(BMXWT)         16.31658    0.09147 178.379  < 2e-16 ***
## RIAGENDRMale            0.38862    0.21868   1.777   0.0756 .  
## BMXHT                  -0.44783    0.01201 -37.292  < 2e-16 ***
## years2005-2006         -0.16327    0.33920  -0.481   0.6303    
## years2007-2008         -0.12564    0.32335  -0.389   0.6976    
## years2009-2010         -0.41993    0.31736  -1.323   0.1858    
## years2013-2014         -0.01626    0.32381  -0.050   0.9600    
## years2015-2016          0.30453    0.32866   0.927   0.3542    
## years2017-2018          0.01491    0.33238   0.045   0.9642    
## years2022-2012         -0.10749    0.33713  -0.319   0.7499    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.59 on 4982 degrees of freedom
## Multiple R-squared:  0.877,  Adjusted R-squared:  0.8766 
## F-statistic:  2090 on 17 and 4982 DF,  p-value: < 2.2e-16

run_model("BMXWAIST ~ bs(RIDAGEYR, df = 7) + BMXWT + RIAGENDR + invNorm(BMXHT) + years")
## 
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -29.4860  -3.3002   0.0119   3.3333  22.1949 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           32.241117   0.578546  55.728  < 2e-16 ***
## bs(RIDAGEYR, df = 7)1 -0.432309   0.819343  -0.528  0.59778    
## bs(RIDAGEYR, df = 7)2  1.141647   0.600054   1.903  0.05715 .  
## bs(RIDAGEYR, df = 7)3  1.644574   0.655164   2.510  0.01210 *  
## bs(RIDAGEYR, df = 7)4  4.424307   0.568311   7.785 8.42e-15 ***
## bs(RIDAGEYR, df = 7)5  6.903075   0.741236   9.313  < 2e-16 ***
## bs(RIDAGEYR, df = 7)6  9.699799   0.808505  11.997  < 2e-16 ***
## bs(RIDAGEYR, df = 7)7  8.407813   0.903488   9.306  < 2e-16 ***
## BMXWT                  0.778097   0.004115 189.085  < 2e-16 ***
## RIAGENDRMale           1.030185   0.205016   5.025 5.21e-07 ***
## invNorm(BMXHT)        -4.248985   0.112371 -37.812  < 2e-16 ***
## years2005-2006        -0.483375   0.322711  -1.498  0.13423    
## years2007-2008        -0.310190   0.307622  -1.008  0.31334    
## years2009-2010        -0.917674   0.301939  -3.039  0.00238 ** 
## years2013-2014        -0.232762   0.308071  -0.756  0.44996    
## years2015-2016        -0.046285   0.312724  -0.148  0.88234    
## years2017-2018        -0.300068   0.316288  -0.949  0.34281    
## years2022-2012        -0.295770   0.320710  -0.922  0.35645    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.317 on 4982 degrees of freedom
## Multiple R-squared:  0.8887, Adjusted R-squared:  0.8883 
## F-statistic:  2340 on 17 and 4982 DF,  p-value: < 2.2e-16

run_model("BMXWAIST ~ bs(RIDAGEYR, df = 7) + bs(BMXWT) + RIAGENDR + BMXHT + years")
## 
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -29.6964  -3.2159   0.0507   3.2231  22.1043 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           129.76775    1.82364  71.159  < 2e-16 ***
## bs(RIDAGEYR, df = 7)1  -0.54769    0.78600  -0.697  0.48596    
## bs(RIDAGEYR, df = 7)2   0.64196    0.57620   1.114  0.26528    
## bs(RIDAGEYR, df = 7)3   1.23408    0.62870   1.963  0.04971 *  
## bs(RIDAGEYR, df = 7)4   3.87270    0.54571   7.097 1.46e-12 ***
## bs(RIDAGEYR, df = 7)5   6.42949    0.71140   9.038  < 2e-16 ***
## bs(RIDAGEYR, df = 7)6   8.97711    0.77641  11.562  < 2e-16 ***
## bs(RIDAGEYR, df = 7)7   8.67049    0.86683  10.003  < 2e-16 ***
## bs(BMXWT)1             65.27609    1.65983  39.327  < 2e-16 ***
## bs(BMXWT)2             96.09541    1.83650  52.325  < 2e-16 ***
## bs(BMXWT)3            126.68545    3.15856  40.109  < 2e-16 ***
## RIAGENDRMale            0.95514    0.20049   4.764 1.95e-06 ***
## BMXHT                  -0.45335    0.01096 -41.362  < 2e-16 ***
## years2005-2006         -0.39003    0.30957  -1.260  0.20777    
## years2007-2008         -0.28987    0.29509  -0.982  0.32599    
## years2009-2010         -0.79705    0.28982  -2.750  0.00598 ** 
## years2013-2014         -0.18996    0.29552  -0.643  0.52037    
## years2015-2016          0.03255    0.30005   0.108  0.91362    
## years2017-2018         -0.26765    0.30344  -0.882  0.37780    
## years2022-2012         -0.25484    0.30768  -0.828  0.40756    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.1 on 4980 degrees of freedom
## Multiple R-squared:  0.8976, Adjusted R-squared:  0.8972 
## F-statistic:  2298 on 19 and 4980 DF,  p-value: < 2.2e-16

run_model("BMXWAIST ~ bs(RIDAGEYR, df = 7) + bs(BMXWT,df=7) + RIAGENDR + bs(BMXHT,df=7) + years")
## 
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -29.9407  -3.1450   0.0111   3.2425  22.0717 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            69.20583    4.09863  16.885  < 2e-16 ***
## bs(RIDAGEYR, df = 7)1  -0.51970    0.78663  -0.661  0.50886    
## bs(RIDAGEYR, df = 7)2   0.57466    0.57704   0.996  0.31936    
## bs(RIDAGEYR, df = 7)3   1.22967    0.62912   1.955  0.05069 .  
## bs(RIDAGEYR, df = 7)4   3.86854    0.54646   7.079 1.65e-12 ***
## bs(RIDAGEYR, df = 7)5   6.39737    0.71200   8.985  < 2e-16 ***
## bs(RIDAGEYR, df = 7)6   9.00941    0.77700  11.595  < 2e-16 ***
## bs(RIDAGEYR, df = 7)7   8.69250    0.86743  10.021  < 2e-16 ***
## bs(BMXWT, df = 7)1     13.26019    3.18959   4.157 3.27e-05 ***
## bs(BMXWT, df = 7)2     24.68206    2.01210  12.267  < 2e-16 ***
## bs(BMXWT, df = 7)3     38.98781    2.32172  16.793  < 2e-16 ***
## bs(BMXWT, df = 7)4     47.84237    2.21391  21.610  < 2e-16 ***
## bs(BMXWT, df = 7)5     85.68573    2.49841  34.296  < 2e-16 ***
## bs(BMXWT, df = 7)6    109.51976    3.27675  33.423  < 2e-16 ***
## bs(BMXWT, df = 7)7    127.87759    4.51005  28.354  < 2e-16 ***
## RIAGENDRMale            0.90811    0.20772   4.372 1.26e-05 ***
## bs(BMXHT, df = 7)1     -7.08407    4.95934  -1.428  0.15323    
## bs(BMXHT, df = 7)2    -11.08331    3.45580  -3.207  0.00135 ** 
## bs(BMXHT, df = 7)3    -15.99038    3.79332  -4.215 2.54e-05 ***
## bs(BMXHT, df = 7)4    -18.27354    3.68766  -4.955 7.46e-07 ***
## bs(BMXHT, df = 7)5    -26.10880    3.85155  -6.779 1.35e-11 ***
## bs(BMXHT, df = 7)6    -27.67786    3.95822  -6.993 3.06e-12 ***
## bs(BMXHT, df = 7)7    -41.77758    4.74141  -8.811  < 2e-16 ***
## years2005-2006         -0.36559    0.30980  -1.180  0.23802    
## years2007-2008         -0.25477    0.29523  -0.863  0.38821    
## years2009-2010         -0.76609    0.29000  -2.642  0.00828 ** 
## years2013-2014         -0.16200    0.29565  -0.548  0.58376    
## years2015-2016          0.06028    0.30005   0.201  0.84080    
## years2017-2018         -0.24868    0.30352  -0.819  0.41265    
## years2022-2012         -0.23286    0.30778  -0.757  0.44935    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.098 on 4970 degrees of freedom
## Multiple R-squared:  0.8979, Adjusted R-squared:  0.8973 
## F-statistic:  1507 on 29 and 4970 DF,  p-value: < 2.2e-16

run_model("BMXWAIST ~ bs(RIDAGEYR, df = 7)*RIAGENDR + bs(BMXWT) + BMXHT + years")
## 
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -30.9159  -3.1634   0.0325   3.2716  20.9161 
## 
## Coefficients:
##                                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                        131.07075    1.84352  71.098  < 2e-16 ***
## bs(RIDAGEYR, df = 7)1               -0.69438    1.07827  -0.644 0.519624    
## bs(RIDAGEYR, df = 7)2               -0.44677    0.78556  -0.569 0.569563    
## bs(RIDAGEYR, df = 7)3               -0.63532    0.86754  -0.732 0.464003    
## bs(RIDAGEYR, df = 7)4                1.63863    0.74599   2.197 0.028095 *  
## bs(RIDAGEYR, df = 7)5                4.69405    0.96950   4.842 1.33e-06 ***
## bs(RIDAGEYR, df = 7)6                5.97096    1.05471   5.661 1.59e-08 ***
## bs(RIDAGEYR, df = 7)7                5.78590    1.14313   5.061 4.31e-07 ***
## RIAGENDRMale                        -2.81639    0.83360  -3.379 0.000734 ***
## bs(BMXWT)1                          65.72118    1.64780  39.884  < 2e-16 ***
## bs(BMXWT)2                          95.24128    1.82222  52.267  < 2e-16 ***
## bs(BMXWT)3                         127.73706    3.13011  40.809  < 2e-16 ***
## BMXHT                               -0.45168    0.01086 -41.600  < 2e-16 ***
## years2005-2006                      -0.48964    0.30654  -1.597 0.110261    
## years2007-2008                      -0.24037    0.29202  -0.823 0.410479    
## years2009-2010                      -0.76188    0.28687  -2.656 0.007938 ** 
## years2013-2014                      -0.11919    0.29257  -0.407 0.683729    
## years2015-2016                       0.08282    0.29701   0.279 0.780367    
## years2017-2018                      -0.23332    0.30039  -0.777 0.437354    
## years2022-2012                      -0.22090    0.30462  -0.725 0.468389    
## bs(RIDAGEYR, df = 7)1:RIAGENDRMale   0.89898    1.55932   0.577 0.564288    
## bs(RIDAGEYR, df = 7)2:RIAGENDRMale   2.34148    1.14073   2.053 0.040162 *  
## bs(RIDAGEYR, df = 7)3:RIAGENDRMale   4.26981    1.24393   3.433 0.000603 ***
## bs(RIDAGEYR, df = 7)4:RIAGENDRMale   4.74559    1.07827   4.401 1.10e-05 ***
## bs(RIDAGEYR, df = 7)5:RIAGENDRMale   4.22667    1.40496   3.008 0.002639 ** 
## bs(RIDAGEYR, df = 7)6:RIAGENDRMale   5.96284    1.52926   3.899 9.78e-05 ***
## bs(RIDAGEYR, df = 7)7:RIAGENDRMale   6.61731    1.70352   3.884 0.000104 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.045 on 4973 degrees of freedom
## Multiple R-squared:    0.9,  Adjusted R-squared:  0.8995 
## F-statistic:  1721 on 26 and 4973 DF,  p-value: < 2.2e-16

run_model("BMXWAIST ~ bs(RIDAGEYR, df = 7)*RIAGENDR + bs(BMXWT) + RIAGENDR+ BMXHT + years")
## 
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -30.9159  -3.1634   0.0325   3.2716  20.9161 
## 
## Coefficients:
##                                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                        131.07075    1.84352  71.098  < 2e-16 ***
## bs(RIDAGEYR, df = 7)1               -0.69438    1.07827  -0.644 0.519624    
## bs(RIDAGEYR, df = 7)2               -0.44677    0.78556  -0.569 0.569563    
## bs(RIDAGEYR, df = 7)3               -0.63532    0.86754  -0.732 0.464003    
## bs(RIDAGEYR, df = 7)4                1.63863    0.74599   2.197 0.028095 *  
## bs(RIDAGEYR, df = 7)5                4.69405    0.96950   4.842 1.33e-06 ***
## bs(RIDAGEYR, df = 7)6                5.97096    1.05471   5.661 1.59e-08 ***
## bs(RIDAGEYR, df = 7)7                5.78590    1.14313   5.061 4.31e-07 ***
## RIAGENDRMale                        -2.81639    0.83360  -3.379 0.000734 ***
## bs(BMXWT)1                          65.72118    1.64780  39.884  < 2e-16 ***
## bs(BMXWT)2                          95.24128    1.82222  52.267  < 2e-16 ***
## bs(BMXWT)3                         127.73706    3.13011  40.809  < 2e-16 ***
## BMXHT                               -0.45168    0.01086 -41.600  < 2e-16 ***
## years2005-2006                      -0.48964    0.30654  -1.597 0.110261    
## years2007-2008                      -0.24037    0.29202  -0.823 0.410479    
## years2009-2010                      -0.76188    0.28687  -2.656 0.007938 ** 
## years2013-2014                      -0.11919    0.29257  -0.407 0.683729    
## years2015-2016                       0.08282    0.29701   0.279 0.780367    
## years2017-2018                      -0.23332    0.30039  -0.777 0.437354    
## years2022-2012                      -0.22090    0.30462  -0.725 0.468389    
## bs(RIDAGEYR, df = 7)1:RIAGENDRMale   0.89898    1.55932   0.577 0.564288    
## bs(RIDAGEYR, df = 7)2:RIAGENDRMale   2.34148    1.14073   2.053 0.040162 *  
## bs(RIDAGEYR, df = 7)3:RIAGENDRMale   4.26981    1.24393   3.433 0.000603 ***
## bs(RIDAGEYR, df = 7)4:RIAGENDRMale   4.74559    1.07827   4.401 1.10e-05 ***
## bs(RIDAGEYR, df = 7)5:RIAGENDRMale   4.22667    1.40496   3.008 0.002639 ** 
## bs(RIDAGEYR, df = 7)6:RIAGENDRMale   5.96284    1.52926   3.899 9.78e-05 ***
## bs(RIDAGEYR, df = 7)7:RIAGENDRMale   6.61731    1.70352   3.884 0.000104 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.045 on 4973 degrees of freedom
## Multiple R-squared:    0.9,  Adjusted R-squared:  0.8995 
## F-statistic:  1721 on 26 and 4973 DF,  p-value: < 2.2e-16

run_model("BMXWAIST ~ bs(RIDAGEYR, df = 7)*RIAGENDR + bs(BMXWT) + BMXHT")
## 
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -30.8866  -3.1477   0.0247   3.2572  20.6844 
## 
## Coefficients:
##                                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                        130.83953    1.81340  72.152  < 2e-16 ***
## bs(RIDAGEYR, df = 7)1               -0.67331    1.07829  -0.624 0.532381    
## bs(RIDAGEYR, df = 7)2               -0.47603    0.78570  -0.606 0.544628    
## bs(RIDAGEYR, df = 7)3               -0.57909    0.86737  -0.668 0.504393    
## bs(RIDAGEYR, df = 7)4                1.60258    0.74453   2.152 0.031408 *  
## bs(RIDAGEYR, df = 7)5                4.78568    0.96732   4.947 7.77e-07 ***
## bs(RIDAGEYR, df = 7)6                5.90639    1.05093   5.620 2.01e-08 ***
## bs(RIDAGEYR, df = 7)7                5.90886    1.13554   5.204 2.03e-07 ***
## RIAGENDRMale                        -2.79536    0.83363  -3.353 0.000805 ***
## bs(BMXWT)1                          65.84560    1.64746  39.968  < 2e-16 ***
## bs(BMXWT)2                          95.17391    1.82101  52.264  < 2e-16 ***
## bs(BMXWT)3                         127.80215    3.13060  40.824  < 2e-16 ***
## BMXHT                               -0.45211    0.01081 -41.831  < 2e-16 ***
## bs(RIDAGEYR, df = 7)1:RIAGENDRMale   0.87658    1.55938   0.562 0.574052    
## bs(RIDAGEYR, df = 7)2:RIAGENDRMale   2.33369    1.14076   2.046 0.040835 *  
## bs(RIDAGEYR, df = 7)3:RIAGENDRMale   4.20345    1.24400   3.379 0.000733 ***
## bs(RIDAGEYR, df = 7)4:RIAGENDRMale   4.75758    1.07739   4.416 1.03e-05 ***
## bs(RIDAGEYR, df = 7)5:RIAGENDRMale   4.12616    1.40464   2.938 0.003324 ** 
## bs(RIDAGEYR, df = 7)6:RIAGENDRMale   5.96003    1.52891   3.898 9.82e-05 ***
## bs(RIDAGEYR, df = 7)7:RIAGENDRMale   6.54486    1.70422   3.840 0.000124 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.048 on 4980 degrees of freedom
## Multiple R-squared:  0.8997, Adjusted R-squared:  0.8994 
## F-statistic:  2352 on 19 and 4980 DF,  p-value: < 2.2e-16

# grid.arrange(g1, g2,g3, nrow=3)

with BMI

run_model("BMXWAIST ~ BMXBMI")
## 
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -35.280  -4.613  -0.013   4.764  23.156 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 36.58903    0.44626   81.99   <2e-16 ***
## BMXBMI       2.15998    0.01497  144.29   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.001 on 4998 degrees of freedom
## Multiple R-squared:  0.8064, Adjusted R-squared:  0.8064 
## F-statistic: 2.082e+04 on 1 and 4998 DF,  p-value: < 2.2e-16

run_model("BMXWAIST ~ BMXWT + BMXHT")
## 
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -33.808  -3.997   0.020   4.072  20.894 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 108.778055   1.476754   73.66   <2e-16 ***
## BMXWT         0.777885   0.004649  167.33   <2e-16 ***
## BMXHT        -0.435119   0.009621  -45.23   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.084 on 4997 degrees of freedom
## Multiple R-squared:  0.8538, Adjusted R-squared:  0.8538 
## F-statistic: 1.459e+04 on 2 and 4997 DF,  p-value: < 2.2e-16

run_model("BMXWAIST ~ BMXWT + BMXHT + BMXBMI")
## 
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -34.238  -3.981  -0.001   4.113  20.670 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 67.24995    6.41356  10.486  < 2e-16 ***
## BMXWT        0.52600    0.03815  13.789  < 2e-16 ***
## BMXHT       -0.18744    0.03845  -4.875 1.12e-06 ***
## BMXBMI       0.71029    0.10677   6.652 3.20e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.058 on 4996 degrees of freedom
## Multiple R-squared:  0.8551, Adjusted R-squared:  0.855 
## F-statistic:  9829 on 3 and 4996 DF,  p-value: < 2.2e-16

run_model("BMXWAIST ~ bs(BMXBMI,df=7)")
## 
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -33.264  -4.468  -0.052   4.572  25.709 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           69.032      3.675  18.782  < 2e-16 ***
## bs(BMXBMI, df = 7)1   -4.366      5.130  -0.851 0.394805    
## bs(BMXBMI, df = 7)2   12.024      3.399   3.537 0.000408 ***
## bs(BMXBMI, df = 7)3   25.591      3.781   6.769 1.45e-11 ***
## bs(BMXBMI, df = 7)4   34.191      3.655   9.356  < 2e-16 ***
## bs(BMXBMI, df = 7)5   61.130      3.945  15.494  < 2e-16 ***
## bs(BMXBMI, df = 7)6   82.407      4.458  18.486  < 2e-16 ***
## bs(BMXBMI, df = 7)7   92.484      5.182  17.846  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.776 on 4992 degrees of freedom
## Multiple R-squared:  0.8189, Adjusted R-squared:  0.8186 
## F-statistic:  3225 on 7 and 4992 DF,  p-value: < 2.2e-16
## Warning in bs(BMXBMI, degree = 3L, knots = c(`20%` = 23.698, `40%` = 26.5, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases

run_model("BMXWAIST ~ bs(BMXBMI,df=7)*RIAGENDR")
## 
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -29.3432  -4.1309  -0.0958   4.0150  22.6188 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                        68.750      4.090  16.810  < 2e-16 ***
## bs(BMXBMI, df = 7)1                -6.653      5.677  -1.172  0.24127    
## bs(BMXBMI, df = 7)2                12.324      3.822   3.225  0.00127 ** 
## bs(BMXBMI, df = 7)3                23.070      4.213   5.476 4.57e-08 ***
## bs(BMXBMI, df = 7)4                31.057      4.076   7.619 3.04e-14 ***
## bs(BMXBMI, df = 7)5                58.816      4.393  13.389  < 2e-16 ***
## bs(BMXBMI, df = 7)6                75.125      4.935  15.223  < 2e-16 ***
## bs(BMXBMI, df = 7)7                92.387      5.432  17.008  < 2e-16 ***
## RIAGENDRMale                       -2.168      6.960  -0.312  0.75543    
## bs(BMXBMI, df = 7)1:RIAGENDRMale    9.247      9.752   0.948  0.34303    
## bs(BMXBMI, df = 7)2:RIAGENDRMale    2.494      6.411   0.389  0.69727    
## bs(BMXBMI, df = 7)3:RIAGENDRMale    7.245      7.158   1.012  0.31151    
## bs(BMXBMI, df = 7)4:RIAGENDRMale    8.070      6.914   1.167  0.24318    
## bs(BMXBMI, df = 7)5:RIAGENDRMale   11.322      7.490   1.512  0.13070    
## bs(BMXBMI, df = 7)6:RIAGENDRMale   26.310      8.647   3.043  0.00236 ** 
## bs(BMXBMI, df = 7)7:RIAGENDRMale   16.355     11.538   1.417  0.15640    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.084 on 4984 degrees of freedom
## Multiple R-squared:  0.8542, Adjusted R-squared:  0.8538 
## F-statistic:  1947 on 15 and 4984 DF,  p-value: < 2.2e-16
## Warning in bs(BMXBMI, degree = 3L, knots = c(`20%` = 23.698, `40%` = 26.5, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases

run_model("BMXWAIST ~ bs(RIDAGEYR, df = 7) + bs(BMXBMI,df=7)*RIAGENDR")
## 
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -26.6971  -3.6161  -0.1469   3.5118  23.6668 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                       63.1146     3.8170  16.535  < 2e-16 ***
## bs(RIDAGEYR, df = 7)1             -0.4781     0.8694  -0.550  0.58240    
## bs(RIDAGEYR, df = 7)2              0.6127     0.6378   0.961  0.33676    
## bs(RIDAGEYR, df = 7)3              1.2412     0.6948   1.786  0.07409 .  
## bs(RIDAGEYR, df = 7)4              3.4396     0.6027   5.707 1.22e-08 ***
## bs(RIDAGEYR, df = 7)5              5.6256     0.7841   7.175 8.31e-13 ***
## bs(RIDAGEYR, df = 7)6              6.8470     0.8534   8.023 1.27e-15 ***
## bs(RIDAGEYR, df = 7)7              7.2470     0.9469   7.654 2.33e-14 ***
## bs(BMXBMI, df = 7)1               -1.7501     5.2626  -0.333  0.73948    
## bs(BMXBMI, df = 7)2               15.0842     3.5419   4.259 2.09e-05 ***
## bs(BMXBMI, df = 7)3               25.8088     3.9040   6.611 4.22e-11 ***
## bs(BMXBMI, df = 7)4               33.5146     3.7771   8.873  < 2e-16 ***
## bs(BMXBMI, df = 7)5               62.3744     4.0721  15.318  < 2e-16 ***
## bs(BMXBMI, df = 7)6               77.9231     4.5742  17.035  < 2e-16 ***
## bs(BMXBMI, df = 7)7               96.7677     5.0343  19.222  < 2e-16 ***
## RIAGENDRMale                       0.7498     6.4512   0.116  0.90748    
## bs(BMXBMI, df = 7)1:RIAGENDRMale   3.8685     9.0428   0.428  0.66882    
## bs(BMXBMI, df = 7)2:RIAGENDRMale   0.5399     5.9413   0.091  0.92759    
## bs(BMXBMI, df = 7)3:RIAGENDRMale   4.1655     6.6354   0.628  0.53018    
## bs(BMXBMI, df = 7)4:RIAGENDRMale   5.2843     6.4082   0.825  0.40963    
## bs(BMXBMI, df = 7)5:RIAGENDRMale   8.2247     6.9439   1.184  0.23629    
## bs(BMXBMI, df = 7)6:RIAGENDRMale  22.7391     8.0123   2.838  0.00456 ** 
## bs(BMXBMI, df = 7)7:RIAGENDRMale  16.0643    10.7015   1.501  0.13339    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.635 on 4977 degrees of freedom
## Multiple R-squared:  0.8751, Adjusted R-squared:  0.8746 
## F-statistic:  1585 on 22 and 4977 DF,  p-value: < 2.2e-16
## Warning in bs(BMXBMI, degree = 3L, knots = c(`20%` = 23.698, `40%` = 26.5, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases

run_model("BMXWAIST ~ bs(RIDAGEYR) + bs(BMXBMI,df=7)*RIAGENDR")
## 
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -26.8047  -3.6006  -0.1526   3.5137  23.5800 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                      62.97722    3.80109  16.568  < 2e-16 ***
## bs(RIDAGEYR)1                    -0.08702    0.87789  -0.099  0.92104    
## bs(RIDAGEYR)2                     6.13320    0.65206   9.406  < 2e-16 ***
## bs(RIDAGEYR)3                     7.34483    0.57263  12.827  < 2e-16 ***
## bs(BMXBMI, df = 7)1              -1.74500    5.25995  -0.332  0.74009    
## bs(BMXBMI, df = 7)2              15.11513    3.53966   4.270 1.99e-05 ***
## bs(BMXBMI, df = 7)3              25.83294    3.90209   6.620 3.96e-11 ***
## bs(BMXBMI, df = 7)4              33.54225    3.77525   8.885  < 2e-16 ***
## bs(BMXBMI, df = 7)5              62.38433    4.07017  15.327  < 2e-16 ***
## bs(BMXBMI, df = 7)6              77.98325    4.57071  17.062  < 2e-16 ***
## bs(BMXBMI, df = 7)7              96.78097    5.03224  19.232  < 2e-16 ***
## RIAGENDRMale                      0.61459    6.44595   0.095  0.92404    
## bs(BMXBMI, df = 7)1:RIAGENDRMale  4.09644    9.03350   0.453  0.65023    
## bs(BMXBMI, df = 7)2:RIAGENDRMale  0.63747    5.93654   0.107  0.91449    
## bs(BMXBMI, df = 7)3:RIAGENDRMale  4.30260    6.63011   0.649  0.51640    
## bs(BMXBMI, df = 7)4:RIAGENDRMale  5.41220    6.40329   0.845  0.39803    
## bs(BMXBMI, df = 7)5:RIAGENDRMale  8.38355    6.93762   1.208  0.22694    
## bs(BMXBMI, df = 7)6:RIAGENDRMale 22.78357    8.00740   2.845  0.00445 ** 
## bs(BMXBMI, df = 7)7:RIAGENDRMale 16.29167   10.68503   1.525  0.12739    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.633 on 4981 degrees of freedom
## Multiple R-squared:  0.8751, Adjusted R-squared:  0.8746 
## F-statistic:  1939 on 18 and 4981 DF,  p-value: < 2.2e-16
## Warning in bs(BMXBMI, degree = 3L, knots = c(`20%` = 23.698, `40%` = 26.5, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases

run_model("BMXWAIST ~ bs(RIDAGEYR, df = 7) + bs(BMXBMI,df=7)*RIAGENDR + BMXHT")
## 
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -27.8562  -3.2529  -0.0377   3.2791  21.4987 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                       5.78012    3.87207   1.493  0.13556    
## bs(RIDAGEYR, df = 7)1            -0.49713    0.78783  -0.631  0.52807    
## bs(RIDAGEYR, df = 7)2             0.59545    0.57793   1.030  0.30291    
## bs(RIDAGEYR, df = 7)3             1.20719    0.62963   1.917  0.05526 .  
## bs(RIDAGEYR, df = 7)4             3.78504    0.54624   6.929 4.77e-12 ***
## bs(RIDAGEYR, df = 7)5             6.40054    0.71089   9.004  < 2e-16 ***
## bs(RIDAGEYR, df = 7)6             8.82192    0.77564  11.374  < 2e-16 ***
## bs(RIDAGEYR, df = 7)7             8.95609    0.85960  10.419  < 2e-16 ***
## bs(BMXBMI, df = 7)1               3.37323    4.77134   0.707  0.47961    
## bs(BMXBMI, df = 7)2              17.72547    3.21050   5.521 3.54e-08 ***
## bs(BMXBMI, df = 7)3              29.65608    3.53964   8.378  < 2e-16 ***
## bs(BMXBMI, df = 7)4              37.29638    3.42463  10.891  < 2e-16 ***
## bs(BMXBMI, df = 7)5              65.79094    3.69142  17.823  < 2e-16 ***
## bs(BMXBMI, df = 7)6              82.09602    4.14691  19.797  < 2e-16 ***
## bs(BMXBMI, df = 7)7              98.23367    4.56211  21.533  < 2e-16 ***
## RIAGENDRMale                     -1.83410    5.84637  -0.314  0.75375    
## BMXHT                             0.33234    0.01009  32.941  < 2e-16 ***
## bs(BMXBMI, df = 7)1:RIAGENDRMale  2.45790    8.19433   0.300  0.76423    
## bs(BMXBMI, df = 7)2:RIAGENDRMale -0.85085    5.38398  -0.158  0.87444    
## bs(BMXBMI, df = 7)3:RIAGENDRMale  2.16509    6.01304   0.360  0.71881    
## bs(BMXBMI, df = 7)4:RIAGENDRMale  3.12682    5.80723   0.538  0.59030    
## bs(BMXBMI, df = 7)5:RIAGENDRMale  6.20527    6.29258   0.986  0.32412    
## bs(BMXBMI, df = 7)6:RIAGENDRMale 18.84517    7.26143   2.595  0.00948 ** 
## bs(BMXBMI, df = 7)7:RIAGENDRMale 12.65650    9.69784   1.305  0.19192    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.106 on 4976 degrees of freedom
## Multiple R-squared:  0.8975, Adjusted R-squared:  0.897 
## F-statistic:  1894 on 23 and 4976 DF,  p-value: < 2.2e-16
## Warning in bs(BMXBMI, degree = 3L, knots = c(`20%` = 23.698, `40%` = 26.5, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases

run_model("BMXWAIST ~ bs(RIDAGEYR, df = 7) + bs(BMXBMI,df=7)*RIAGENDR + bs(BMXHT)")
## 
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -28.0082  -3.2273  -0.0345   3.2790  21.4527 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                       50.2740     3.7142  13.536  < 2e-16 ***
## bs(RIDAGEYR, df = 7)1             -0.4773     0.7873  -0.606  0.54440    
## bs(RIDAGEYR, df = 7)2              0.5362     0.5778   0.928  0.35350    
## bs(RIDAGEYR, df = 7)3              1.2016     0.6294   1.909  0.05630 .  
## bs(RIDAGEYR, df = 7)4              3.7778     0.5459   6.920 5.08e-12 ***
## bs(RIDAGEYR, df = 7)5              6.3721     0.7104   8.969  < 2e-16 ***
## bs(RIDAGEYR, df = 7)6              8.8152     0.7752  11.371  < 2e-16 ***
## bs(RIDAGEYR, df = 7)7              8.9829     0.8590  10.457  < 2e-16 ***
## bs(BMXBMI, df = 7)1                3.6833     4.7689   0.772  0.43993    
## bs(BMXBMI, df = 7)2               17.9075     3.2087   5.581 2.52e-08 ***
## bs(BMXBMI, df = 7)3               29.9117     3.5381   8.454  < 2e-16 ***
## bs(BMXBMI, df = 7)4               37.5515     3.4231  10.970  < 2e-16 ***
## bs(BMXBMI, df = 7)5               66.0049     3.6894  17.890  < 2e-16 ***
## bs(BMXBMI, df = 7)6               82.3773     4.1449  19.874  < 2e-16 ***
## bs(BMXBMI, df = 7)7               98.2841     4.5588  21.559  < 2e-16 ***
## RIAGENDRMale                      -1.9749     5.8433  -0.338  0.73540    
## bs(BMXHT)1                         6.2055     3.0064   2.064  0.03906 *  
## bs(BMXHT)2                        18.8865     1.2254  15.413  < 2e-16 ***
## bs(BMXHT)3                        19.4071     2.6173   7.415 1.42e-13 ***
## bs(BMXBMI, df = 7)1:RIAGENDRMale   2.4877     8.1889   0.304  0.76130    
## bs(BMXBMI, df = 7)2:RIAGENDRMale  -0.6946     5.3808  -0.129  0.89729    
## bs(BMXBMI, df = 7)3:RIAGENDRMale   2.1814     6.0090   0.363  0.71661    
## bs(BMXBMI, df = 7)4:RIAGENDRMale   3.1559     5.8034   0.544  0.58660    
## bs(BMXBMI, df = 7)5:RIAGENDRMale   6.2933     6.2882   1.001  0.31697    
## bs(BMXBMI, df = 7)6:RIAGENDRMale  19.0439     7.2574   2.624  0.00872 ** 
## bs(BMXBMI, df = 7)7:RIAGENDRMale  13.1318     9.6920   1.355  0.17551    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.103 on 4974 degrees of freedom
## Multiple R-squared:  0.8977, Adjusted R-squared:  0.8972 
## F-statistic:  1745 on 25 and 4974 DF,  p-value: < 2.2e-16
## Warning in bs(BMXBMI, degree = 3L, knots = c(`20%` = 23.698, `40%` = 26.5, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases

run_model("BMXWAIST ~ bs(RIDAGEYR, df = 7) + bs(BMXBMI,df=7)*RIAGENDR + BMXWT")
## 
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -28.4531  -3.2040   0.0178   3.2398  21.5454 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                      48.88262    3.46399  14.112  < 2e-16 ***
## bs(RIDAGEYR, df = 7)1            -0.53179    0.78323  -0.679  0.49718    
## bs(RIDAGEYR, df = 7)2             0.60611    0.57455   1.055  0.29150    
## bs(RIDAGEYR, df = 7)3             1.10998    0.62595   1.773  0.07625 .  
## bs(RIDAGEYR, df = 7)4             3.80219    0.54305   7.002 2.87e-12 ***
## bs(RIDAGEYR, df = 7)5             6.36138    0.70668   9.002  < 2e-16 ***
## bs(RIDAGEYR, df = 7)6             8.81049    0.77097  11.428  < 2e-16 ***
## bs(RIDAGEYR, df = 7)7             8.67978    0.85405  10.163  < 2e-16 ***
## bs(BMXBMI, df = 7)1              -1.63954    4.74092  -0.346  0.72949    
## bs(BMXBMI, df = 7)2               9.63609    3.19475   3.016  0.00257 ** 
## bs(BMXBMI, df = 7)3              16.44795    3.52778   4.662 3.21e-06 ***
## bs(BMXBMI, df = 7)4              21.27040    3.42168   6.216 5.50e-10 ***
## bs(BMXBMI, df = 7)5              37.43621    3.74096  10.007  < 2e-16 ***
## bs(BMXBMI, df = 7)6              43.30599    4.24458  10.203  < 2e-16 ***
## bs(BMXBMI, df = 7)7              46.22252    4.77252   9.685  < 2e-16 ***
## RIAGENDRMale                     -0.55529    5.81179  -0.096  0.92389    
## BMXWT                             0.34107    0.01003  34.010  < 2e-16 ***
## bs(BMXBMI, df = 7)1:RIAGENDRMale  2.98931    8.14634   0.367  0.71367    
## bs(BMXBMI, df = 7)2:RIAGENDRMale -1.11226    5.35255  -0.208  0.83539    
## bs(BMXBMI, df = 7)3:RIAGENDRMale  1.34971    5.97815   0.226  0.82139    
## bs(BMXBMI, df = 7)4:RIAGENDRMale  1.68279    5.77388   0.291  0.77072    
## bs(BMXBMI, df = 7)5:RIAGENDRMale  2.76426    6.25755   0.442  0.65869    
## bs(BMXBMI, df = 7)6:RIAGENDRMale 11.91382    7.22503   1.649  0.09922 .  
## bs(BMXBMI, df = 7)7:RIAGENDRMale  2.36852    9.64899   0.245  0.80610    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.077 on 4976 degrees of freedom
## Multiple R-squared:  0.8987, Adjusted R-squared:  0.8982 
## F-statistic:  1919 on 23 and 4976 DF,  p-value: < 2.2e-16
## Warning in bs(BMXBMI, degree = 3L, knots = c(`20%` = 23.698, `40%` = 26.5, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases

run_model("BMXWAIST ~ bs(RIDAGEYR, df = 7) + bs(BMXBMI,df=7)*RIAGENDR + bs(BMXWT) + bs(BMXHT)")
## 
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -28.7818  -3.2043  -0.0148   3.2379  21.4550 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                       52.6336    12.3622   4.258  2.1e-05 ***
## bs(RIDAGEYR, df = 7)1             -0.5191     0.7824  -0.663   0.5071    
## bs(RIDAGEYR, df = 7)2              0.5124     0.5744   0.892   0.3723    
## bs(RIDAGEYR, df = 7)3              1.0835     0.6258   1.732   0.0834 .  
## bs(RIDAGEYR, df = 7)4              3.7774     0.5426   6.962  3.8e-12 ***
## bs(RIDAGEYR, df = 7)5              6.3173     0.7059   8.949  < 2e-16 ***
## bs(RIDAGEYR, df = 7)6              8.7881     0.7703  11.408  < 2e-16 ***
## bs(RIDAGEYR, df = 7)7              8.6950     0.8542  10.179  < 2e-16 ***
## bs(BMXBMI, df = 7)1                3.1605     8.1146   0.389   0.6969    
## bs(BMXBMI, df = 7)2               18.5216    13.1601   1.407   0.1594    
## bs(BMXBMI, df = 7)3               29.6260    19.4960   1.520   0.1287    
## bs(BMXBMI, df = 7)4               36.8089    23.0453   1.597   0.1103    
## bs(BMXBMI, df = 7)5               61.2786    36.1213   1.696   0.0899 .  
## bs(BMXBMI, df = 7)6               69.8472    41.2720   1.692   0.0906 .  
## bs(BMXBMI, df = 7)7               76.9340    47.8398   1.608   0.1079    
## RIAGENDRMale                      -0.8923     5.8260  -0.153   0.8783    
## bs(BMXWT)1                        -6.7810    39.8696  -0.170   0.8650    
## bs(BMXWT)2                        19.2442    37.7508   0.510   0.6102    
## bs(BMXWT)3                        24.6989    56.9119   0.434   0.6643    
## bs(BMXHT)1                         5.7762    10.3515   0.558   0.5769    
## bs(BMXHT)2                        16.2061    18.0580   0.897   0.3695    
## bs(BMXHT)3                        11.6001    24.9546   0.465   0.6421    
## bs(BMXBMI, df = 7)1:RIAGENDRMale   3.1021     8.1369   0.381   0.7030    
## bs(BMXBMI, df = 7)2:RIAGENDRMale  -0.6639     5.3629  -0.124   0.9015    
## bs(BMXBMI, df = 7)3:RIAGENDRMale   1.6340     5.9965   0.272   0.7853    
## bs(BMXBMI, df = 7)4:RIAGENDRMale   1.8860     5.7944   0.325   0.7448    
## bs(BMXBMI, df = 7)5:RIAGENDRMale   2.8524     6.2825   0.454   0.6498    
## bs(BMXBMI, df = 7)6:RIAGENDRMale  10.6235     7.3949   1.437   0.1509    
## bs(BMXBMI, df = 7)7:RIAGENDRMale   8.7937    12.9829   0.677   0.4982    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.07 on 4971 degrees of freedom
## Multiple R-squared:  0.899,  Adjusted R-squared:  0.8985 
## F-statistic:  1581 on 28 and 4971 DF,  p-value: < 2.2e-16
## Warning in bs(BMXBMI, degree = 3L, knots = c(`20%` = 23.698, `40%` = 26.5, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases

run_model("BMXWAIST ~ bs(RIDAGEYR, df = 7) + bs(BMXBMI,df=7)*RIAGENDR + BMXWT + BMXHT + years")
## 
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -28.4946  -3.2323   0.0243   3.2239  21.6942 
## 
## Coefficients:
##                                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                      48.784045   6.772695   7.203 6.77e-13 ***
## bs(RIDAGEYR, df = 7)1            -0.551260   0.782943  -0.704  0.48141    
## bs(RIDAGEYR, df = 7)2             0.637920   0.574395   1.111  0.26680    
## bs(RIDAGEYR, df = 7)3             1.084544   0.626184   1.732  0.08334 .  
## bs(RIDAGEYR, df = 7)4             3.840357   0.543774   7.062 1.86e-12 ***
## bs(RIDAGEYR, df = 7)5             6.300188   0.708492   8.892  < 2e-16 ***
## bs(RIDAGEYR, df = 7)6             8.903066   0.774289  11.498  < 2e-16 ***
## bs(RIDAGEYR, df = 7)7             8.559773   0.864535   9.901  < 2e-16 ***
## bs(BMXBMI, df = 7)1              -1.433239   4.788154  -0.299  0.76470    
## bs(BMXBMI, df = 7)2               9.667675   3.357950   2.879  0.00401 ** 
## bs(BMXBMI, df = 7)3              16.586470   3.910158   4.242 2.26e-05 ***
## bs(BMXBMI, df = 7)4              21.393432   3.984290   5.369 8.26e-08 ***
## bs(BMXBMI, df = 7)5              37.616719   5.184384   7.256 4.61e-13 ***
## bs(BMXBMI, df = 7)6              43.599449   6.487478   6.721 2.01e-11 ***
## bs(BMXBMI, df = 7)7              46.445105   8.107393   5.729 1.07e-08 ***
## RIAGENDRMale                     -0.449955   5.814097  -0.077  0.93832    
## BMXWT                             0.339038   0.044051   7.696 1.68e-14 ***
## BMXHT                             0.002447   0.044061   0.056  0.95571    
## years2005-2006                   -0.411412   0.308202  -1.335  0.18198    
## years2007-2008                   -0.322565   0.293889  -1.098  0.27244    
## years2009-2010                   -0.821936   0.288420  -2.850  0.00439 ** 
## years2013-2014                   -0.164556   0.294160  -0.559  0.57591    
## years2015-2016                    0.054766   0.298569   0.183  0.85447    
## years2017-2018                   -0.210854   0.302186  -0.698  0.48536    
## years2022-2012                   -0.220935   0.306265  -0.721  0.47071    
## bs(BMXBMI, df = 7)1:RIAGENDRMale  2.750295   8.146386   0.338  0.73567    
## bs(BMXBMI, df = 7)2:RIAGENDRMale -1.200781   5.352091  -0.224  0.82249    
## bs(BMXBMI, df = 7)3:RIAGENDRMale  1.226864   5.977941   0.205  0.83740    
## bs(BMXBMI, df = 7)4:RIAGENDRMale  1.587317   5.775753   0.275  0.78346    
## bs(BMXBMI, df = 7)5:RIAGENDRMale  2.764122   6.270469   0.441  0.65937    
## bs(BMXBMI, df = 7)6:RIAGENDRMale 11.775246   7.272313   1.619  0.10547    
## bs(BMXBMI, df = 7)7:RIAGENDRMale  2.670857   9.731143   0.274  0.78374    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.074 on 4968 degrees of freedom
## Multiple R-squared:  0.899,  Adjusted R-squared:  0.8983 
## F-statistic:  1426 on 31 and 4968 DF,  p-value: < 2.2e-16
## Warning in bs(BMXBMI, degree = 3L, knots = c(`20%` = 23.698, `40%` = 26.5, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases

# base model:
run_model("BMXWAIST ~ bs(RIDAGEYR, df = 7) + bs(BMXBMI,df=7)*RIAGENDR + BMXWT + BMXHT")
## 
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -28.4478  -3.2060   0.0143   3.2382  21.5451 
## 
## Coefficients:
##                                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                      48.422415   6.767358   7.155 9.56e-13 ***
## bs(RIDAGEYR, df = 7)1            -0.531457   0.783318  -0.678  0.49751    
## bs(RIDAGEYR, df = 7)2             0.605997   0.574605   1.055  0.29165    
## bs(RIDAGEYR, df = 7)3             1.110928   0.626132   1.774  0.07608 .  
## bs(RIDAGEYR, df = 7)4             3.802206   0.543100   7.001 2.88e-12 ***
## bs(RIDAGEYR, df = 7)5             6.362188   0.706825   9.001  < 2e-16 ***
## bs(RIDAGEYR, df = 7)6             8.811666   0.771186  11.426  < 2e-16 ***
## bs(RIDAGEYR, df = 7)7             8.683451   0.855397  10.151  < 2e-16 ***
## bs(BMXBMI, df = 7)1              -1.586848   4.787880  -0.331  0.74033    
## bs(BMXBMI, df = 7)2               9.718095   3.358780   2.893  0.00383 ** 
## bs(BMXBMI, df = 7)3              16.581586   3.911192   4.240 2.28e-05 ***
## bs(BMXBMI, df = 7)4              21.432070   3.985095   5.378 7.87e-08 ***
## bs(BMXBMI, df = 7)5              37.720486   5.185866   7.274 4.04e-13 ***
## bs(BMXBMI, df = 7)6              43.694622   6.490055   6.733 1.86e-11 ***
## bs(BMXBMI, df = 7)7              46.741394   8.108147   5.765 8.67e-09 ***
## RIAGENDRMale                     -0.569421   5.815111  -0.098  0.92200    
## BMXWT                             0.337670   0.044072   7.662 2.19e-14 ***
## BMXHT                             0.003489   0.044078   0.079  0.93691    
## bs(BMXBMI, df = 7)1:RIAGENDRMale  2.983259   8.147514   0.366  0.71426    
## bs(BMXBMI, df = 7)2:RIAGENDRMale -1.110409   5.353139  -0.207  0.83568    
## bs(BMXBMI, df = 7)3:RIAGENDRMale  1.356751   5.979405   0.227  0.82051    
## bs(BMXBMI, df = 7)4:RIAGENDRMale  1.696008   5.776870   0.294  0.76909    
## bs(BMXBMI, df = 7)5:RIAGENDRMale  2.797453   6.272203   0.446  0.65561    
## bs(BMXBMI, df = 7)6:RIAGENDRMale 11.980762   7.275066   1.647  0.09966 .  
## bs(BMXBMI, df = 7)7:RIAGENDRMale  2.469160   9.733345   0.254  0.79975    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.077 on 4975 degrees of freedom
## Multiple R-squared:  0.8987, Adjusted R-squared:  0.8982 
## F-statistic:  1838 on 24 and 4975 DF,  p-value: < 2.2e-16
## Warning in bs(BMXBMI, degree = 3L, knots = c(`20%` = 23.698, `40%` = 26.5, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases

# grid.arrange(g1, g2,g3, nrow=3)

regression models with variables

base_form <- "BMXWAIST ~ bs(RIDAGEYR, df = 7) + bs(BMXBMI,df=7)*RIAGENDR + BMXWT + BMXHT + "

# MFA 16:1 (Hexadecenoic) (gm)
run_model(paste0(base_form,"DR1TM161"))
## 
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -28.2867  -3.2198   0.0061   3.2529  21.5415 
## 
## Coefficients:
##                                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                      48.587258   6.767025   7.180 8.00e-13 ***
## bs(RIDAGEYR, df = 7)1            -0.508330   0.783321  -0.649  0.51641    
## bs(RIDAGEYR, df = 7)2             0.602174   0.574517   1.048  0.29463    
## bs(RIDAGEYR, df = 7)3             1.111861   0.626031   1.776  0.07579 .  
## bs(RIDAGEYR, df = 7)4             3.795480   0.543028   6.989 3.12e-12 ***
## bs(RIDAGEYR, df = 7)5             6.315843   0.707290   8.930  < 2e-16 ***
## bs(RIDAGEYR, df = 7)6             8.768631   0.771519  11.365  < 2e-16 ***
## bs(RIDAGEYR, df = 7)7             8.644418   0.855599  10.103  < 2e-16 ***
## bs(BMXBMI, df = 7)1              -1.693851   4.787559  -0.354  0.72350    
## bs(BMXBMI, df = 7)2               9.612337   3.358871   2.862  0.00423 ** 
## bs(BMXBMI, df = 7)3              16.453496   3.911358   4.207 2.64e-05 ***
## bs(BMXBMI, df = 7)4              21.321622   3.985032   5.350 9.17e-08 ***
## bs(BMXBMI, df = 7)5              37.561334   5.185957   7.243 5.07e-13 ***
## bs(BMXBMI, df = 7)6              43.526103   6.489837   6.707 2.21e-11 ***
## bs(BMXBMI, df = 7)7              46.480074   8.108438   5.732 1.05e-08 ***
## RIAGENDRMale                     -0.725807   5.814969  -0.125  0.90067    
## BMXWT                             0.338758   0.044070   7.687 1.81e-14 ***
## BMXHT                             0.003619   0.044071   0.082  0.93456    
## DR1TM161                         -0.137338   0.084893  -1.618  0.10577    
## bs(BMXBMI, df = 7)1:RIAGENDRMale  3.251670   8.147879   0.399  0.68985    
## bs(BMXBMI, df = 7)2:RIAGENDRMale -0.938861   5.353319  -0.175  0.86079    
## bs(BMXBMI, df = 7)3:RIAGENDRMale  1.570757   5.979897   0.263  0.79281    
## bs(BMXBMI, df = 7)4:RIAGENDRMale  1.873999   5.776979   0.324  0.74566    
## bs(BMXBMI, df = 7)5:RIAGENDRMale  3.075553   6.273539   0.490  0.62398    
## bs(BMXBMI, df = 7)6:RIAGENDRMale 12.024983   7.273936   1.653  0.09836 .  
## bs(BMXBMI, df = 7)7:RIAGENDRMale  2.975277   9.736791   0.306  0.75994    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.076 on 4974 degrees of freedom
## Multiple R-squared:  0.8987, Adjusted R-squared:  0.8982 
## F-statistic:  1766 on 25 and 4974 DF,  p-value: < 2.2e-16
## Warning in bs(BMXBMI, degree = 3L, knots = c(`20%` = 23.698, `40%` = 26.5, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases

hist(data$DR1TM161)

hist(invNorm(data$DR1TM161))

run_model(paste0(base_form,"invNorm(DR1TM161)"))
## 
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -28.2578  -3.2162   0.0063   3.2431  21.5752 
## 
## Coefficients:
##                                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                      48.408101   6.765339   7.155 9.56e-13 ***
## bs(RIDAGEYR, df = 7)1            -0.491274   0.783343  -0.627  0.53059    
## bs(RIDAGEYR, df = 7)2             0.593253   0.574469   1.033  0.30180    
## bs(RIDAGEYR, df = 7)3             1.121506   0.625968   1.792  0.07325 .  
## bs(RIDAGEYR, df = 7)4             3.795222   0.542949   6.990 3.11e-12 ***
## bs(RIDAGEYR, df = 7)5             6.319020   0.706945   8.938  < 2e-16 ***
## bs(RIDAGEYR, df = 7)6             8.760815   0.771377  11.357  < 2e-16 ***
## bs(RIDAGEYR, df = 7)7             8.634479   0.855494  10.093  < 2e-16 ***
## bs(BMXBMI, df = 7)1              -1.817780   4.787851  -0.380  0.70421    
## bs(BMXBMI, df = 7)2               9.554625   3.358778   2.845  0.00446 ** 
## bs(BMXBMI, df = 7)3              16.384344   3.911275   4.189 2.85e-05 ***
## bs(BMXBMI, df = 7)4              21.262094   3.984816   5.336 9.94e-08 ***
## bs(BMXBMI, df = 7)5              37.495251   5.185547   7.231 5.54e-13 ***
## bs(BMXBMI, df = 7)6              43.484731   6.488970   6.701 2.29e-11 ***
## bs(BMXBMI, df = 7)7              46.382756   8.107720   5.721 1.12e-08 ***
## RIAGENDRMale                     -0.781669   5.814348  -0.134  0.89306    
## BMXWT                             0.338766   0.044063   7.688 1.79e-14 ***
## BMXHT                             0.004078   0.044066   0.093  0.92627    
## invNorm(DR1TM161)                -0.150832   0.075659  -1.994  0.04625 *  
## bs(BMXBMI, df = 7)1:RIAGENDRMale  3.324562   8.146878   0.408  0.68323    
## bs(BMXBMI, df = 7)2:RIAGENDRMale -0.885211   5.352731  -0.165  0.86866    
## bs(BMXBMI, df = 7)3:RIAGENDRMale  1.635948   5.979259   0.274  0.78440    
## bs(BMXBMI, df = 7)4:RIAGENDRMale  1.933776   5.776375   0.335  0.73781    
## bs(BMXBMI, df = 7)5:RIAGENDRMale  3.151327   6.272841   0.502  0.61543    
## bs(BMXBMI, df = 7)6:RIAGENDRMale 12.062262   7.273007   1.658  0.09728 .  
## bs(BMXBMI, df = 7)7:RIAGENDRMale  3.055050   9.734874   0.314  0.75367    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.076 on 4974 degrees of freedom
## Multiple R-squared:  0.8988, Adjusted R-squared:  0.8982 
## F-statistic:  1766 on 25 and 4974 DF,  p-value: < 2.2e-16
## Warning in bs(BMXBMI, degree = 3L, knots = c(`20%` = 23.698, `40%` = 26.5, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases

# Dietary day one sample weight
run_model(paste0(base_form,"WTDRD1"))
## 
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -28.4547  -3.2044   0.0127   3.2389  21.5341 
## 
## Coefficients:
##                                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                       4.835e+01  6.783e+00   7.127 1.17e-12 ***
## bs(RIDAGEYR, df = 7)1            -5.295e-01  7.835e-01  -0.676  0.49922    
## bs(RIDAGEYR, df = 7)2             6.067e-01  5.747e-01   1.056  0.29117    
## bs(RIDAGEYR, df = 7)3             1.111e+00  6.262e-01   1.774  0.07620 .  
## bs(RIDAGEYR, df = 7)4             3.806e+00  5.438e-01   7.000 2.90e-12 ***
## bs(RIDAGEYR, df = 7)5             6.357e+00  7.076e-01   8.984  < 2e-16 ***
## bs(RIDAGEYR, df = 7)6             8.813e+00  7.713e-01  11.426  < 2e-16 ***
## bs(RIDAGEYR, df = 7)7             8.678e+00  8.561e-01  10.136  < 2e-16 ***
## bs(BMXBMI, df = 7)1              -1.568e+00  4.790e+00  -0.327  0.74348    
## bs(BMXBMI, df = 7)2               9.731e+00  3.360e+00   2.896  0.00379 ** 
## bs(BMXBMI, df = 7)3               1.660e+01  3.913e+00   4.242 2.25e-05 ***
## bs(BMXBMI, df = 7)4               2.145e+01  3.987e+00   5.380 7.79e-08 ***
## bs(BMXBMI, df = 7)5               3.775e+01  5.190e+00   7.274 4.03e-13 ***
## bs(BMXBMI, df = 7)6               4.372e+01  6.493e+00   6.734 1.84e-11 ***
## bs(BMXBMI, df = 7)7               4.678e+01  8.113e+00   5.767 8.58e-09 ***
## RIAGENDRMale                     -5.748e-01  5.816e+00  -0.099  0.92127    
## BMXWT                             3.374e-01  4.410e-02   7.652 2.37e-14 ***
## BMXHT                             4.018e-03  4.420e-02   0.091  0.92757    
## WTDRD1                           -2.557e-07  1.552e-06  -0.165  0.86918    
## bs(BMXBMI, df = 7)1:RIAGENDRMale  2.980e+00  8.148e+00   0.366  0.71461    
## bs(BMXBMI, df = 7)2:RIAGENDRMale -1.113e+00  5.354e+00  -0.208  0.83533    
## bs(BMXBMI, df = 7)3:RIAGENDRMale  1.358e+00  5.980e+00   0.227  0.82037    
## bs(BMXBMI, df = 7)4:RIAGENDRMale  1.697e+00  5.777e+00   0.294  0.76896    
## bs(BMXBMI, df = 7)5:RIAGENDRMale  2.801e+00  6.273e+00   0.446  0.65529    
## bs(BMXBMI, df = 7)6:RIAGENDRMale  1.199e+01  7.276e+00   1.648  0.09945 .  
## bs(BMXBMI, df = 7)7:RIAGENDRMale  2.478e+00  9.734e+00   0.255  0.79909    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.078 on 4974 degrees of freedom
## Multiple R-squared:  0.8987, Adjusted R-squared:  0.8982 
## F-statistic:  1765 on 25 and 4974 DF,  p-value: < 2.2e-16
## Warning in bs(BMXBMI, degree = 3L, knots = c(`20%` = 23.698, `40%` = 26.5, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases

# BMXARML - Upper Arm Length (cm)
run_model(paste0(base_form,"BMXARML"))
## 
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -28.4378  -3.2012   0.0298   3.2378  21.5431 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                      48.76081    6.76760   7.205 6.67e-13 ***
## bs(RIDAGEYR, df = 7)1            -0.57038    0.78334  -0.728  0.46657    
## bs(RIDAGEYR, df = 7)2             0.57984    0.57459   1.009  0.31296    
## bs(RIDAGEYR, df = 7)3             1.09208    0.62603   1.744  0.08114 .  
## bs(RIDAGEYR, df = 7)4             3.79216    0.54297   6.984 3.24e-12 ***
## bs(RIDAGEYR, df = 7)5             6.41928    0.70722   9.077  < 2e-16 ***
## bs(RIDAGEYR, df = 7)6             8.88023    0.77175  11.507  < 2e-16 ***
## bs(RIDAGEYR, df = 7)7             8.79530    0.85704  10.262  < 2e-16 ***
## bs(BMXBMI, df = 7)1              -1.63395    4.78656  -0.341  0.73284    
## bs(BMXBMI, df = 7)2               9.73041    3.35782   2.898  0.00377 ** 
## bs(BMXBMI, df = 7)3              16.59364    3.91007   4.244 2.24e-05 ***
## bs(BMXBMI, df = 7)4              21.45620    3.98397   5.386 7.55e-08 ***
## bs(BMXBMI, df = 7)5              37.74824    5.18439   7.281 3.83e-13 ***
## bs(BMXBMI, df = 7)6              43.60914    6.48833   6.721 2.01e-11 ***
## bs(BMXBMI, df = 7)7              46.73453    8.10581   5.766 8.63e-09 ***
## RIAGENDRMale                     -0.53496    5.81346  -0.092  0.92669    
## BMXWT                             0.34186    0.04411   7.750 1.11e-14 ***
## BMXHT                             0.02057    0.04491   0.458  0.64706    
## BMXARML                          -0.09566    0.04866  -1.966  0.04938 *  
## bs(BMXBMI, df = 7)1:RIAGENDRMale  3.01879    8.14519   0.371  0.71093    
## bs(BMXBMI, df = 7)2:RIAGENDRMale -1.08485    5.35161  -0.203  0.83937    
## bs(BMXBMI, df = 7)3:RIAGENDRMale  1.36000    5.97768   0.228  0.82003    
## bs(BMXBMI, df = 7)4:RIAGENDRMale  1.67878    5.77521   0.291  0.77130    
## bs(BMXBMI, df = 7)5:RIAGENDRMale  2.76089    6.27043   0.440  0.65974    
## bs(BMXBMI, df = 7)6:RIAGENDRMale 11.75667    7.27387   1.616  0.10610    
## bs(BMXBMI, df = 7)7:RIAGENDRMale  2.30877    9.73089   0.237  0.81246    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.076 on 4974 degrees of freedom
## Multiple R-squared:  0.8987, Adjusted R-squared:  0.8982 
## F-statistic:  1766 on 25 and 4974 DF,  p-value: < 2.2e-16
## Warning in bs(BMXBMI, degree = 3L, knots = c(`20%` = 23.698, `40%` = 26.5, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases

#BMXLEG - Upper Leg Length (cm)
run_model(paste0(base_form,"BMXLEG"))
## 
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -25.5270  -3.2304   0.0284   3.1615  22.0200 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                      48.20742    6.62127   7.281 3.84e-13 ***
## bs(RIDAGEYR, df = 7)1            -0.82682    0.76666  -1.078 0.280878    
## bs(RIDAGEYR, df = 7)2             0.36717    0.56243   0.653 0.513897    
## bs(RIDAGEYR, df = 7)3             0.63721    0.61344   1.039 0.298971    
## bs(RIDAGEYR, df = 7)4             2.95979    0.53436   5.539 3.20e-08 ***
## bs(RIDAGEYR, df = 7)5             5.45059    0.69425   7.851 5.02e-15 ***
## bs(RIDAGEYR, df = 7)6             7.45079    0.76002   9.803  < 2e-16 ***
## bs(RIDAGEYR, df = 7)7             8.35203    0.83722   9.976  < 2e-16 ***
## bs(BMXBMI, df = 7)1              -3.67781    4.68661  -0.785 0.432638    
## bs(BMXBMI, df = 7)2               8.58713    3.28714   2.612 0.009020 ** 
## bs(BMXBMI, df = 7)3              14.75803    3.82870   3.855 0.000117 ***
## bs(BMXBMI, df = 7)4              19.64720    3.90089   5.037 4.91e-07 ***
## bs(BMXBMI, df = 7)5              35.13904    5.07685   6.921 5.04e-12 ***
## bs(BMXBMI, df = 7)6              39.83430    6.35520   6.268 3.97e-10 ***
## bs(BMXBMI, df = 7)7              44.25840    7.93484   5.578 2.57e-08 ***
## RIAGENDRMale                     -1.71525    5.69009  -0.301 0.763087    
## BMXWT                             0.35028    0.04313   8.122 5.75e-16 ***
## BMXHT                             0.11264    0.04374   2.575 0.010047 *  
## BMXLEG                           -0.42902    0.02873 -14.932  < 2e-16 ***
## bs(BMXBMI, df = 7)1:RIAGENDRMale  5.37460    7.97322   0.674 0.500291    
## bs(BMXBMI, df = 7)2:RIAGENDRMale -0.27623    5.23787  -0.053 0.957944    
## bs(BMXBMI, df = 7)3:RIAGENDRMale  2.72980    5.85104   0.467 0.640842    
## bs(BMXBMI, df = 7)4:RIAGENDRMale  2.79959    5.65264   0.495 0.620430    
## bs(BMXBMI, df = 7)5:RIAGENDRMale  3.92119    6.13725   0.639 0.522907    
## bs(BMXBMI, df = 7)6:RIAGENDRMale 14.42007    7.11988   2.025 0.042887 *  
## bs(BMXBMI, df = 7)7:RIAGENDRMale -0.63900    9.52549  -0.067 0.946518    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.967 on 4974 degrees of freedom
## Multiple R-squared:  0.903,  Adjusted R-squared:  0.9025 
## F-statistic:  1853 on 25 and 4974 DF,  p-value: < 2.2e-16
## Warning in bs(BMXBMI, degree = 3L, knots = c(`20%` = 23.698, `40%` = 26.5, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases

# grid.arrange(g1, g2,g3, nrow=3)